--- title: Where to Find the Best Beer in the US author: Chad Peltier date: '2020-12-07' slug: where-to-find-the-best-beer-in-the-us categories: - R tags: - Geospatial ---
The amazing TidyTuesday project has listed two beer-focused datasets this year, one focused on beer production by state, and the other on Great American Beer Fest (GABF) awards.
From those, I was originally interested in joining the GABF awards data with beer reviews data from BeerAdvocate… but joining the data (off of brewery and beer names) has turned out to be messy and time consuming. So in the meantime, I wanted to share some data just from scraping Beer Advocate reviews.
Scraping was a 3-part process:
Overall, I collected the following data for each beer:
library(tidytuesdayR)
library(rvest)
library(janitor)
library(tidyverse)
library(sf)
library(leaflet)
library(patchwork)
library(tidytext)
Scraping the styles information was straightforward – go to the styles page, find the style html nodes, and pull the linked url from the attributes.
Each style reviews page has a unique identifier number. I then created a dataframe that combined all of the style ID numbers with the page numbers of reviews. For example, page 2 of the American Barleywines style page is “https://www.beeradvocate.com/beer/styles/19/?sort=revsD&start=50” – so I repeated the ending digits in steps of 50 from 0 to 5000 in order to get the 5000 most-reviewed beers per substyle.
It would have been possible to pull either all the beers on BA or to pull it based on average rating, but since I was only interested in beers with more than 10 reviews (and I ended up collecting 4x the number of beers than I ended up using because they had fewer than 10 reviews, anyway) this strategy worked fine.
styles <- read_html(paste0("https://www.beeradvocate.com/beer/styles/"))
style_nums <- styles %>%
html_nodes("a") %>%
html_attr("href") %>%
tibble() %>%
rename("links" = 1) %>%
filter(str_detect(links, "styles/")) %>%
mutate(style_num = parse_number(links)) %>%
drop_na(style_num)
pages <- seq(from = 0, to = 5000, by = 50)
style_nums2 <- rep(style_nums$style_num, times = length(pages)) %>%
enframe() %>%
arrange(value) %>%
pull(value)
pages2 <- rep(pages, times = (length(style_nums2) / length(pages)))
style_pages <- tibble(style_nums2, pages2)
Next came scraping the tables from the individual style pages. It consisted of pulling the table itself and then adding in columns for the link to the individual beer pages as well as the beer substyle. I wrapped all of those pulls into a function that I then purrr::map2’d on the style_pages dataframe I created above (which contained all possible substyle review page urls).
get_reviews_style <- function(style_num, page){
url <- read_html(paste0("https://www.beeradvocate.com/beer/styles/",
style_num,
"/?sort=revD&start=", page) )
stats <- url %>%
html_nodes("table") %>%
html_table(fill = TRUE) %>%
tibble() %>%
unnest(".") %>%
slice(-c(1:2)) %>%
row_to_names(1) %>%
remove_empty() %>%
slice(1:n()-1)
style <- url %>%
html_node("h1") %>%
html_text()
beer_links <- url %>%
html_nodes("a") %>%
html_attr("href") %>%
tibble() %>%
rename("links" = 1) %>%
filter(str_detect(links, "\\/beer\\/profile\\/\\d+\\/\\d+\\/")) %>%
mutate(links = paste0("https://beeradvocate.com", links))
stats <- stats %>%
mutate(beer_style = style) %>%
bind_cols(beer_links)
}
top_beers <- map2(style_pages$style_nums2, style_pages$pages2,
~ get_reviews_style(.x, .y)) %>%
bind_rows()
## Clean and filter to only beers with > 20 ratings, adds broad style and substyle cols
top_beers2 <- top_beers %>%
clean_names() %>%
mutate(broad_style = str_extract(beer_style, ".+(?= - )"),
sub_style = str_extract(beer_style, "(?<= - ).+"),
broad_style = if_else(is.na(broad_style), beer_style, broad_style),
sub_style = if_else(is.na(sub_style), beer_style, sub_style),
brewery = str_remove(brewery, "-.+"),
brewery = str_remove(brewery, " & Tasting Room"),
brewery = str_remove(brewery, " & Grill"),
brewery = str_remove(brewery, " & Kitchen"),
brewery = str_remove(brewery, " & Brewpub"),
brewery = str_remove(brewery, " Co\\."),
brewery = str_remove(brewery, " Company"),
brewery = str_trim(brewery),
across(c(abv, ratings,avg), as.numeric)) %>%
filter(ratings > 10) %>%
mutate(group_num = rep(1:10, each = 4692, length.out = nrow(.)))
Finally, there was a little more data from the beers’ individual review pages that I also pulled. This included the brewery’s state or country and the BA score (as opposed to BA users’ average ratings).
There is also code below to scrape the most recent 25 user reviews text, but I didn’t end up running that code section for the analysis below, since pulling all of the data took > 6 hours as it was. I also had to break up that purrr::map() function into 5 separate pulls (although it’s not reflected in the code chunk below) because I kept getting timeout errors from rvest connecting with the BA site.
get_more_beer_info <- function(url){
## Brewery state, BA score
state_names <- state.name
country_names <- str_trim(ISOcodes::UN_M.49_Countries$Name)
info <- read_html(url) %>%
html_nodes("div") %>%
html_text() %>%
enframe() %>%
mutate(value = str_remove_all(value, "\n"),
value = str_replace(value, "Avail", " Avail")) %>%
filter(str_detect(value, "^Beer Geek Stats:")) %>%
mutate(state = str_extract(value, paste(state_names, collapse = "|")),
country = str_extract(value, paste(country_names, collapse = "|")),
score = str_extract(value, "(?<=Score:)\\d+"),
links = url)
info # comment out if including user reviews
## Most recent 25 user reviews
reviews <- read_html(url) %>%
html_nodes("#rating_fullview_content_2") %>%
html_text() %>%
enframe() %>%
mutate(value = str_remove_all(value, ".+(?=overall:)"),
value = str_remove_all(value, "\n")) %>%
summarize(reviews = paste(value, collapse = "|")) %>%
mutate(links = url)
info %>%
left_join(reviews, by = "links")
}
## map across all beers in top_beers2 df
more_beer_info <- map_df(top_beers2$links, get_more_beer_info)
## join with top_beers2 df
top_beers3 <- top_beers2 %>%
left_join(more_beer_info, by = "links") %>%
mutate(score = as.numeric(score))
Here are basic counts for how many beers we have in the final data by both broad style and substyle. LOTS of IPAs, although that shoudl surprise no one :) The top four substyles are all IPAs or American pales.
top_beers3 %>%
count(broad_style, sort = TRUE) %>%
head(20)
## # A tibble: 20 x 2
## broad_style n
## <chr> <int>
## 1 IPA 12269
## 2 Stout 5386
## 3 Lager 3945
## 4 Pale Ale 3537
## 5 Wheat Beer 2156
## 6 Farmhouse Ale 1980
## 7 Porter 1952
## 8 Wild Ale 1564
## 9 Red Ale 1453
## 10 Pilsner 1300
## 11 Sour 1270
## 12 Brown Ale 1069
## 13 Strong Ale 1039
## 14 Fruit and Field Beer 931
## 15 Blonde Ale 859
## 16 Bock 634
## 17 Barleywine 573
## 18 Bitter 548
## 19 Tripel 487
## 20 Kölsch 430
top_beers3 %>%
count(beer_style, sort = TRUE) %>%
head(20)
## # A tibble: 20 x 2
## beer_style n
## <chr> <int>
## 1 IPA - American 4901
## 2 IPA - Imperial 3794
## 3 Pale Ale - American 2695
## 4 IPA - New England 2402
## 5 Stout - American Imperial 1981
## 6 Farmhouse Ale - Saison 1829
## 7 Wild Ale 1564
## 8 Red Ale - American Amber / Red 1116
## 9 Porter - American 1032
## 10 Fruit and Field Beer 931
## 11 Stout - American 875
## 12 Stout - Sweet / Milk 786
## 13 Stout - Russian Imperial 765
## 14 Pilsner - German 762
## 15 Blonde Ale - American 749
## 16 Wheat Beer - Hefeweizen 685
## 17 Wheat Beer - Witbier 662
## 18 Sour - Berliner Weisse 652
## 19 Brown Ale - American 645
## 20 Wheat Beer - American Pale 586
First, let’s take a look at the raw number and percent of each style that is either rated above a 4 by BA users or above a 90 in the BA score. Above a 90 indicates that a beer is either in the “Outstanding” or “World Class” categories.
## num/percent of beers > 4 rating
top_beers3 %>%
group_by(broad_style) %>%
summarize(n = n(),
n_over4 = sum(avg >= 4, na.rm = TRUE),
percent_over4 = round((n_over4 / n),2)) %>%
filter(!is.na(broad_style)) %>%
ggplot(aes(percent_over4, reorder(broad_style, percent_over4), fill = broad_style)) +
geom_col() +
theme_classic() +
theme(legend.position = "none",
text = element_text(size=9)) +
labs(y = "Beer Style", x = "Percent of Beers with an Average Rating >= 4",
title = "Percent of Beers with an Average Rating >= 4") +
scale_x_continuous(labels = scales::percent_format())

top_beers3 %>%
group_by(broad_style) %>%
summarize(n = n(),
n_over4 = sum(avg >= 4, na.rm = TRUE),
percent_over4 = round((n_over4 / n),2)) %>%
filter(!is.na(broad_style)) %>%
ggplot(aes(n_over4, reorder(broad_style, n_over4), fill = broad_style)) +
geom_col() +
theme_classic() +
theme(legend.position = "none",
text = element_text(size=9)) +
labs(y = "Beer Style", x = "Number of Beers with an Average Rating >= 4",
title = "Number of Beers with an Average Rating >= 4")

top_beers3 %>%
group_by(broad_style) %>%
summarize(n = n(),
n_over90 = sum(score >= 90, na.rm = TRUE),
percent_over90 = round((n_over90 / n),2)) %>%
filter(!is.na(broad_style)) %>%
ggplot(aes(percent_over90, reorder(broad_style, percent_over90), fill = broad_style)) +
geom_col() +
theme_classic() +
theme(legend.position = "none",
text = element_text(size=9)) +
labs(y = "Beer Style", x = "Percent of Beers with a BA Score >= 90",
title = "Percent of Beers by General Style with a BA Score >= 90") +
scale_x_continuous(labels = scales::percent_format())

Wild ales and lambics do well just about however you slice the data. Obviously the list of lambic brewers and blenders in the world is small, but it’s still remarkable to have over 60% of a style rated as either outstanding or world class.
The fact that the single highest number of amazing (4+ by reader ratings) IPAs speaks to the insane number of IPAs produced right now. And it’s no surprise that stouts and wild ales are the second and (distant) third-place styles – those are usually the kinds of beers we think of as hype whales.
Also of note: based on reader averages, high-ABV styles like quads, (many) stouts, barleywines, and (some) IPAs are at or near the top of the list of the most 4+ rated beers. Again, these are whale-ish beers, like Dark Lord, which costs $170 for just five bottles, Hunahpu or Pliny the Younger.
What about the distribution of reviews within a style? Using boxplots and the interquartile range, we can take a look at the most and least-divisive beer styles. The idea here is that beer ratings for some styles are more clustered together than others, while some styles have both very high and very low-rated beers.
## distribution of reviews
top_beers3 %>%
filter(!is.na(broad_style)) %>%
ggplot(aes(avg, reorder(broad_style, avg), color = broad_style)) +
#geom_jitter(aes(alpha = 0.3)) +
geom_boxplot() +
theme_classic() +
theme(legend.position = "none",
text = element_text(size=9)) +
labs(y = "Beer Style", x = "Avg Rating")

## Most divisive styles (within-style IQR)
top_beers3 %>%
group_by(broad_style) %>%
summarize(iqr = IQR(avg)) %>%
filter(!is.na(broad_style)) %>%
arrange(desc(iqr)) %>%
slice_max(order_by = iqr, n = 20) %>%
ggplot(aes(iqr, reorder(broad_style, iqr), color = broad_style)) +
geom_point() +
geom_segment(aes(x = 0, xend = iqr, y = broad_style, yend = broad_style)) +
theme_classic() +
theme(panel.grid.major.y = element_blank(),
panel.border = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "none",
text = element_text(size=9)) +
labs(x = "Interquartile range", y = "Beer Style",
title = "Most Divisive Beer Styles")

## Most divisive substyles (within-style IQR)
top_beers3 %>%
group_by(beer_style) %>%
summarize(iqr = IQR(avg)) %>%
filter(!is.na(beer_style)) %>%
arrange(desc(iqr)) %>%
slice_max(order_by = iqr, n = 20) %>%
ggplot(aes(iqr, reorder(beer_style, iqr), color = beer_style)) +
geom_point() +
geom_segment(aes(x = 0, xend = iqr, y = beer_style, yend = beer_style)) +
theme_classic() +
theme(panel.grid.major.y = element_blank(),
panel.border = element_blank(),
axis.ticks.y = element_blank(),
text = element_text(size=9),
legend.position = "none") +
labs(x = "Interquartile range", y = "Beer substyle",
title = "Most Divisive Beer Substyles")

# Least divisive substyles (within-style IQR)
top_beers3 %>%
group_by(beer_style) %>%
summarize(iqr = IQR(avg)) %>%
filter(!is.na(beer_style)) %>%
arrange(desc(iqr)) %>%
slice_min(order_by = iqr, n = 20) %>%
ggplot(aes(iqr, reorder(beer_style, -iqr), color = beer_style)) +
geom_point() +
geom_segment(aes(x = 0, xend = iqr, y = beer_style, yend = beer_style)) +
theme_classic() +
theme(panel.grid.major.y = element_blank(),
panel.border = element_blank(),
axis.ticks.y = element_blank(),
text = element_text(size=9),
legend.position = "none") +
labs(x = "Interquartile range", y = "Beer substyle",
title = "Least Divisive Beer Substyles")

Some thoughts here:
As noted above, high-ABV beer styles are often highly-rated, but what about higher-ABV beers within a style?
## Relationships between things
## ABV vs. ratings (broad style and beer style)
top_styles <- top_beers3 %>%
count(broad_style, sort = TRUE) %>%
top_n(9)
top_beers3 %>%
filter(broad_style %in% top_styles$broad_style,
abv < 20) %>%
ggplot(aes(abv, avg)) +
geom_point(aes(alpha = 0.4, color = broad_style)) +
geom_smooth() +
facet_wrap(~broad_style, scales = "free_x") +
theme_classic() +
theme(panel.grid.major.y = element_blank(),
panel.border = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "none") +
labs(y = "Avg Rating", x = "ABV", title = "ABV vs. Avg User Rating by General Style")

top_styles <- top_beers3 %>%
count(beer_style, sort = TRUE) %>%
top_n(9)
top_beers3 %>%
filter(abv < 20,
beer_style %in% top_styles$beer_style) %>%
ggplot(aes(abv, avg)) +
geom_point(aes(alpha = 0.4, color = beer_style)) +
geom_smooth() +
facet_wrap(~beer_style, scales = "free_x") +
theme_classic() +
theme(panel.grid.major.y = element_blank(),
panel.border = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "none") +
labs(y = "Avg Rating", x = "ABV", title = "ABV vs. Avg User Rating by Substyle")

In general, for most of the top-9 styles, there’s not a strong relationship between ABV and average BA user rating. However, for stouts, wheat beers, and porters, there is a small positive correlation. Interestingly, for substyles, there’s not as strong of a relationship between ABV and rating for double IPAs as for imperial stouts.
## Ratings and Avg score
top_beers3 %>%
filter(beer_style %in% top_styles$beer_style) %>%
ggplot(aes(ratings, avg)) +
geom_point(aes(alpha = 0.4, color = beer_style)) +
geom_smooth() +
facet_wrap(~beer_style, scales = "free_x") +
theme_classic() +
theme(panel.grid.major.y = element_blank(),
panel.border = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "none") +
labs(y = "Avg Rating", x = "Number of Ratings", title = "Number of Ratings vs. Avg User Rating by General Style")

## top breweries by style (brewery has to have 3 or more examples)
library(tidytext)
top_styles <- top_beers3 %>%
count(broad_style, sort = TRUE) %>%
top_n(9)
top_beers3 %>%
group_by(broad_style, brewery) %>%
summarize(n = n(),
avg_rating = round(mean(avg, na.rm = TRUE), 2)) %>%
filter(n > 2,
!is.na(broad_style),
broad_style %in% top_styles$broad_style) %>%
group_by(broad_style) %>%
slice_max(n = 10, order_by = avg_rating) %>%
ggplot(aes(avg_rating, reorder_within(brewery, avg_rating, broad_style),
color = broad_style)) +
geom_point() +
geom_segment(aes(x = 0, xend = avg_rating,
y = reorder_within(brewery, avg_rating, broad_style),
yend = reorder_within(brewery, avg_rating, broad_style))) +
facet_wrap(~broad_style, scales = "free_y") +
scale_y_reordered() +
theme_classic() +
theme(panel.grid.major.y = element_blank(),
panel.border = element_blank(),
axis.ticks.y = element_blank(),
text = element_text(size=9),
legend.position = "none") +
labs(x = "Average Rating", y = "Brewery", title = "Top Breweries by Style")

top_styles <- top_beers3 %>%
count(beer_style, sort = TRUE) %>%
top_n(9)
top_beers3 %>%
group_by(beer_style, brewery) %>%
summarize(n = n(),
avg_rating = round(mean(avg, na.rm = TRUE), 2)) %>%
filter(n > 2,
!is.na(beer_style),
beer_style %in% top_styles$beer_style) %>%
group_by(beer_style) %>%
slice_max(n = 10, order_by = avg_rating) %>%
ggplot(aes(avg_rating, reorder_within(brewery, avg_rating, beer_style),
color = beer_style)) +
geom_point() +
geom_segment(aes(x = 0, xend = avg_rating,
y = reorder_within(brewery, avg_rating, beer_style),
yend = reorder_within(brewery, avg_rating, beer_style))) +
facet_wrap(~beer_style, scales = "free_y") +
scale_y_reordered() +
theme_classic() +
theme(panel.grid.major.y = element_blank(),
panel.border = element_blank(),
axis.ticks.y = element_blank(),
text = element_text(size=7),
legend.position = "none") +
labs(x = "Average Rating", y = "Brewery", title = "Top Breweries by Substyle")

## Breweries that are awesome at multiple styles (best breweries in the country?)
## (number of styles that a brewery has a top 20 avg rating in)
top_beers3 %>%
group_by(broad_style, brewery) %>%
summarize(n = n(),
avg_rating = round(mean(avg, na.rm = TRUE), 2)) %>%
filter(n > 2) %>%
group_by(broad_style) %>%
slice_max(order_by = avg_rating, n = 20) %>%
ungroup() %>%
count(brewery, sort = TRUE) %>%
slice_max(order_by = n, n = 20) %>%
ggplot(aes(n, reorder(brewery,n), fill = brewery)) +
geom_col() +
theme_classic() +
theme(panel.grid.major.y = element_blank(),
panel.border = element_blank(),
axis.ticks.y = element_blank(),
text = element_text(size=10),
legend.position = "none") +
labs(x = "Number of Styles w/ Top 20 Avg Rating", y = "Brewery",
title = "Breweries That Are Awesome at Multiple Styles")

## By score > Rating
top_beers3 %>%
group_by(broad_style, brewery) %>%
summarize(n = n(),
avg_rating = round(mean(score, na.rm = TRUE), 2)) %>%
filter(n > 2) %>%
group_by(broad_style) %>%
slice_max(order_by = avg_rating, n = 20) %>%
ungroup() %>%
count(brewery, sort = TRUE) %>%
slice_max(order_by = n, n = 20) %>%
ggplot(aes(n, reorder(brewery,n), fill = brewery)) +
geom_col() +
theme_classic() +
theme(panel.grid.major.y = element_blank(),
panel.border = element_blank(),
axis.ticks.y = element_blank(),
text = element_text(size=10),
legend.position = "none") +
labs(x = "Number of Styles w/ Top 20 Avg Score", y = "Brewery",
title = "Breweries That Are Awesome at Multiple Styles")

top_beers3 %>%
group_by(beer_style, brewery) %>%
summarize(n = n(),
avg_rating = round(mean(avg, na.rm = TRUE), 2)) %>%
filter(n > 2) %>%
group_by(beer_style) %>%
slice_max(order_by = avg_rating, n = 10) %>%
ungroup() %>%
count(brewery, sort = TRUE) %>%
slice_max(order_by = n, n = 20) %>%
ggplot(aes(n, reorder(brewery,n), fill = brewery)) +
geom_col() +
theme_classic() +
theme(panel.grid.major.y = element_blank(),
panel.border = element_blank(),
axis.ticks.y = element_blank(),
text = element_text(size=10),
legend.position = "none") +
labs(x = "Number of Beer Substyles w/ Top 20 Avg Rating", y = "Brewery",
title = "Breweries That Are Awesome at Multiple Substyles")

## relationship bet score and avg rating
top_styles <- top_beers3 %>%
count(broad_style, sort = TRUE) %>%
top_n(9)
top_beers3 %>%
filter(broad_style %in% top_styles$broad_style) %>%
ggplot(aes(x = score, y = avg, color = broad_style, alpha = 0.4)) +
geom_point() +
theme_classic() +
theme(panel.grid.major.y = element_blank(),
panel.border = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "none") +
facet_wrap(~ broad_style, scales = "free") +
labs(x = "Beer Advocate Score", y = "Avg User Rating",
title = "Comparison Between Beer Advocate Scores and User Ratings")

library(tigris)
library(sf)
library(leaflet)
us <- states(cb = TRUE)
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 46%
|
|================================= | 47%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|========================================================== | 84%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
drop_states <- c("Commonwealth of the Northern Mariana Islands",
"United States Virgin Islands",
"Alaska", "Hawaii", "American Samoa", "Guam", "Puerto Rico")
us2 <- us %>%
filter(!NAME %in% drop_states) %>%
st_set_crs("4326")
## top states per style
top_styles <- top_beers3 %>%
count(broad_style, sort = TRUE) %>%
top_n(9)
beer_state_styles <- top_beers3 %>%
group_by(broad_style, state) %>%
summarize(n_beers = n(),
n_over4 = sum(avg >= 4, na.rm = TRUE),
n_over90 = sum(score >= 90, na.rm = TRUE),
avg_rating = round(mean(avg, na.rm = TRUE), 2)) %>%
filter(broad_style %in% top_styles$broad_style) %>%
right_join(us2, by = c("state" = "NAME")) %>%
st_sf(sf_column_name = "geometry", crs = 4326)
maps <- map(top_styles$broad_style, ~ ggplot(beer_state_styles) +
geom_sf() +
geom_sf(data = beer_state_styles %>% filter(broad_style == .x),
aes(fill = n_over4)) +
scale_fill_viridis_b(direction = -1) +
theme_void() +
ggtitle(.x))
maps2 <- map(top_styles$broad_style, ~ ggplot(beer_state_styles) +
geom_sf() +
geom_sf(data = beer_state_styles %>% filter(broad_style == .x),
aes(fill = n_over90)) +
scale_fill_viridis_b(direction = -1) +
theme_void() +
ggtitle(.x))
ggsave(wrap_plots(maps), file = "maps.png", width = 16/1.2, height = 9/1.2)
ggsave(wrap_plots(maps2), file = "maps2.png", width = 16/1.2, height = 9/1.2)
States With the Most Beers with an Avg Rating > 4
States With the Most Beers with a BA Score > 90
## leaflet map - Pilsners
make_leaflet <- function(style) {
style_scores <- top_beers3 %>%
filter(broad_style == style) %>%
group_by(state) %>%
summarize(n_beers = n(),
n_over4 = sum(avg >= 4, na.rm = TRUE),
n_over90 = sum(score >= 90, na.rm = TRUE),
avg_rating = round(mean(avg, na.rm = TRUE), 2)) %>%
left_join(top_beers3 %>%
filter(broad_style == style) %>%
group_by(state) %>%
slice_max(order_by = avg, n = 1) %>%
mutate(name = paste(brewery, name, sep = ": ")) %>%
select(top_beer = name)) %>%
right_join(us2, by = c("state" = "NAME")) %>%
st_sf(sf_column_name = "geometry", crs = 4326)
pal <- colorBin("viridis", style_scores$n_over90, pretty = TRUE, reverse = TRUE)
leaflet(style_scores) %>%
addProviderTiles(providers$Stamen.TonerLite,
options = providerTileOptions(minZoom = 2, maxZoom = 5)) %>%
addPolygons(fillColor = ~ pal(n_over90),
weight = 0.5, opacity = 1,
color = "black",
fillOpacity = 0.5, smoothFactor = 0.5,
label = paste0("N reviews > 4 = ", style_scores$n_over4, ", ",
"N scores > 90 = ", style_scores$n_over90, ", ",
"Avg Rating = ", style_scores$avg_rating, ", ",
"Top beer: ", style_scores$top_beer)) %>%
setView(-98.5795, 39.8282, zoom=3)
}
make_leaflet("Pilsner")
Best place for an IPA:
make_leaflet("IPA")